In [1]:
pip install neo4j
Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: neo4j in c:\users\user\appdata\roaming\python\python313\site-packages (6.1.0) Requirement already satisfied: pytz in c:\programdata\anaconda3\lib\site-packages (from neo4j) (2024.1) Note: you may need to restart the kernel to use updated packages.
In [2]:
pip install py2neo
Defaulting to user installation because normal site-packages is not writeable Requirement already satisfied: py2neo in c:\users\user\appdata\roaming\python\python313\site-packages (2021.2.4) Requirement already satisfied: certifi in c:\programdata\anaconda3\lib\site-packages (from py2neo) (2025.4.26) Requirement already satisfied: interchange~=2021.0.4 in c:\users\user\appdata\roaming\python\python313\site-packages (from py2neo) (2021.0.4) Requirement already satisfied: monotonic in c:\users\user\appdata\roaming\python\python313\site-packages (from py2neo) (1.6) Requirement already satisfied: packaging in c:\programdata\anaconda3\lib\site-packages (from py2neo) (24.2) Requirement already satisfied: pansi>=2020.7.3 in c:\users\user\appdata\roaming\python\python313\site-packages (from py2neo) (2024.11.0) Requirement already satisfied: pygments>=2.0.0 in c:\programdata\anaconda3\lib\site-packages (from py2neo) (2.19.1) Requirement already satisfied: six>=1.15.0 in c:\programdata\anaconda3\lib\site-packages (from py2neo) (1.17.0) Requirement already satisfied: urllib3 in c:\programdata\anaconda3\lib\site-packages (from py2neo) (2.3.0) Requirement already satisfied: pytz in c:\programdata\anaconda3\lib\site-packages (from interchange~=2021.0.4->py2neo) (2024.1) Requirement already satisfied: pillow in c:\programdata\anaconda3\lib\site-packages (from pansi>=2020.7.3->py2neo) (11.1.0) Note: you may need to restart the kernel to use updated packages.
Import Libraries
In [3]:
from py2neo import Graph
import pandas as pd
from neo4j import GraphDatabase
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
Connect to Neo4j
In [5]:
# Connect to Neo4j over Bolt, specifying your database
graph = Graph("bolt://localhost:7687", auth=("neo4j", "Neo4j@123"), name="usroadnetworkdb")
if graph:
print("Connected to Neo4j")
else:
print("Not Connected")
Connected to Neo4j
4. Analyze the centrality of intersections using the Betweenness Centrality algorithm()
Top 20 Intersections by Betweenness Centrality
In [6]:
# Query betweenness scores
query = """
CALL gds.betweenness.stream('usRoadGraph', { samplingSize: 1000 })
YIELD nodeId, score
RETURN gds.util.asNode(nodeId).id AS intersectionId, score
ORDER BY score DESC
LIMIT 20;
"""
df = pd.DataFrame(graph.run(query).data())
print(df)
intersectionId score 0 9233 1.659771e+07 1 1939 1.655155e+07 2 1917 1.653462e+07 3 1921 1.649183e+07 4 1920 1.649179e+07 5 10666 1.645009e+07 6 1916 1.642931e+07 7 1906 1.642927e+07 8 8509 1.601949e+07 9 1903 1.599096e+07 10 1902 1.599087e+07 11 10767 1.581709e+07 12 10769 1.578116e+07 13 8978 1.555402e+07 14 8979 1.555399e+07 15 8980 1.555395e+07 16 10875 1.225606e+07 17 11009 1.204013e+07 18 10993 1.202951e+07 19 10880 1.201712e+07
Pie chart to visuilze the Top 20 Intersections by Betweenness Centrality
In [7]:
plt.figure(figsize=(10,6))
plt.bar(df["intersectionId"].astype(str), df["score"])
plt.xlabel("Intersection ID")
plt.ylabel("Betweenness Centrality Score")
plt.title("Top 20 Intersections by Betweenness Centrality")
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()
In [30]:
# ================================
# Neo4j → Plotly (Dynamic) using py2neo Graph
# - Q5 Dashboard (Totals + Degree Distribution)
# - Q6 Degree Distribution Bar Chart
# - Q7 Top 10 Most Connected Intersections
# - Q8 Intersection Categories by Degree
#
# Assumption: you ALREADY have `graph` defined, e.g.:
# from py2neo import Graph
# graph = Graph("bolt://localhost:7687", auth=("neo4j","PASSWORD"), name="usroadnetworkdb")
# ================================
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
# -------------------------------
# Cypher queries (Neo4j 5+ compatible)
# -------------------------------
# Total intersections
Q_TOTAL_INTERSECTIONS = """
MATCH (i:Intersection)
RETURN count(i) AS totalIntersections;
"""
# Total roads (UNIQUE physical roads, lecture-aligned)
Q_TOTAL_ROADS_UNIQUE = """
MATCH (a:Intersection)-[:ROAD]-(b:Intersection)
WITH CASE WHEN a.id < b.id THEN [a.id, b.id] ELSE [b.id, a.id] END AS road
RETURN count(DISTINCT road) AS totalRoads;
"""
# Degree distribution (for Q6 and dashboard)
Q_DEGREE_DISTRIBUTION = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree;
"""
# Top 10 most connected intersections (Q7)
Q_TOP10_DEGREE = """
MATCH (i:Intersection)-[:ROAD]-()
WITH i, count(*) AS degree
RETURN i.id AS intersectionId, degree
ORDER BY degree DESC
LIMIT 10;
"""
# Categories by degree (Q8)
Q_DEGREE_CATEGORIES = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
WITH CASE
WHEN degree <= 2 THEN 'Low Connectivity'
WHEN degree <= 4 THEN 'Medium Connectivity'
ELSE 'High Connectivity'
END AS category
RETURN category, count(*) AS numberOfIntersections
ORDER BY numberOfIntersections DESC;
"""
# Average degree (optional check)
Q_AVG_DEGREE = """
MATCH (i:Intersection)
RETURN avg(COUNT { (i)-[:ROAD]-() }) AS avgDegree;
"""
# -------------------------------
# Helpers
# -------------------------------
def scalar(query: str):
"""Return a single scalar value from a Cypher query."""
return graph.run(query).evaluate()
def df(query: str) -> pd.DataFrame:
"""Return a DataFrame from a Cypher query."""
return graph.run(query).to_data_frame()
# -------------------------------
# Fetch data dynamically
# -------------------------------
total_intersections = scalar(Q_TOTAL_INTERSECTIONS)
total_roads_unique = scalar(Q_TOTAL_ROADS_UNIQUE)
avg_degree = scalar(Q_AVG_DEGREE) # optional (e.g., ~2.8)
degree_dist_df = df(Q_DEGREE_DISTRIBUTION)
top10_df = df(Q_TOP10_DEGREE)
categories_df = df(Q_DEGREE_CATEGORIES)
# ================================
# Q5: SIMPLE DASHBOARD (Totals + Degree Distribution)
# ================================
dashboard = make_subplots(
rows=2, cols=2,
specs=[[{"type": "indicator"}, {"type": "indicator"}],
[{"colspan": 2}, None]],
subplot_titles=("Total Intersections", "Total Roads (Unique)", "Degree Distribution")
)
dashboard.add_trace(
go.Indicator(mode="number", value=total_intersections),
row=1, col=1
)
dashboard.add_trace(
go.Indicator(mode="number", value=total_roads_unique),
row=1, col=2
)
dashboard.add_trace(
go.Bar(x=degree_dist_df["degree"], y=degree_dist_df["frequency"]),
row=2, col=1
)
dashboard.update_layout(
title="US Road Network Overview Dashboard",
height=720,
showlegend=False
)
dashboard.show()
# ================================
# Q6: DEGREE DISTRIBUTION BAR CHART (standalone)
# ================================
fig_q6 = px.bar(
degree_dist_df,
x="degree",
y="frequency",
labels={"degree": "Number of Roads (Degree)", "frequency": "Number of Intersections"},
title="Q6: Degree Distribution of Intersections"
)
fig_q6.update_layout(xaxis=dict(tickmode="linear"))
fig_q6.show()
# ================================
# Q7: TOP 10 MOST CONNECTED INTERSECTIONS (standalone)
# ================================
fig_q7 = px.bar(
top10_df,
x="intersectionId",
y="degree",
labels={"intersectionId": "Intersection ID", "degree": "Number of Connected Roads"},
title="Q7: Top 10 Most Connected Intersections"
)
fig_q7.update_layout(xaxis=dict(type="category"))
fig_q7.show()
# ================================
# Q8: INTERSECTION CATEGORIES BY DEGREE (standalone)
# ================================
fig_q8 = px.bar(
categories_df,
x="category",
y="numberOfIntersections",
labels={"category": "Connectivity Category", "numberOfIntersections": "Number of Intersections"},
title="Q8: Intersection Categories by Degree"
)
fig_q8.show()
# ================================
# Optional: print quick sanity checks
# ================================
print("Total Intersections:", total_intersections)
print("Total Roads (Unique):", total_roads_unique)
print("Average Degree:", avg_degree)
Total Intersections: 87575 Total Roads (Unique): 121491 Average Degree: 2.774558949471861
6.Degree Distribution Bar Chart
In [31]:
# Cypher query (Degree Distribution)
query = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree
"""
# ----------------------------------
# Run query using graph (no session)
# ----------------------------------
df = graph.run(query).to_data_frame()
# ----------------------------------
# Plotly bar chart
# ----------------------------------
fig = px.bar(
df,
x="degree",
y="frequency",
labels={
"degree": "Number of Roads (Degree)",
"frequency": "Number of Intersections"
},
title="Degree Distribution of Intersections in the US Road Network"
)
fig.update_layout(
xaxis=dict(tickmode="linear"),
bargap=0.2
)
fig.show()
7. Top 10 Most Connected Intersections:
In [32]:
# Run Cypher query dynamically
# ----------------------------------
query = """
MATCH (i:Intersection)-[:ROAD]-()
WITH i, count(*) AS degree
RETURN i.id AS intersectionId, degree
ORDER BY degree DESC
LIMIT 10
"""
df = graph.run(query).to_data_frame()
# ----------------------------------
# Plotly bar chart
# ----------------------------------
fig = px.bar(
df,
x="intersectionId",
y="degree",
labels={
"intersectionId": "Intersection ID",
"degree": "Number of Connected Roads"
},
title="Top 10 Most Connected Intersections in the US Road Network"
)
fig.update_layout(
xaxis=dict(type="category"),
bargap=0.3
)
fig.show()
8. Intersection Categories by Degree
In [33]:
# Run query dynamically from db
query = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
WITH
CASE
WHEN degree <= 2 THEN 'Low Connectivity'
WHEN degree <= 4 THEN 'Medium Connectivity'
ELSE 'High Connectivity'
END AS category
RETURN category, count(*) AS numberOfIntersections
ORDER BY numberOfIntersections DESC
"""
df = graph.run(query).to_data_frame()
# Bar chart
fig = px.bar(
df,
x="category",
y="numberOfIntersections",
labels={
"category": "Intersection Connectivity Category",
"numberOfIntersections": "Number of Intersections"
},
title="Intersection Categories by Degree in the US Road Network"
)
fig.show()
Pie Chart: Road Network Composition
Bar Chart: Road Network Metrics
In [11]:
import plotly.graph_objects as go
# ---------------------------------
# Get values from Neo4j
# ---------------------------------
intersections = graph.run("""
MATCH (i:Intersection)
RETURN count(i)
""").evaluate()
roads = graph.run("""
MATCH (a:Intersection)-[:ROAD]-(b:Intersection)
WITH CASE WHEN a.id < b.id THEN [a.id, b.id] ELSE [b.id, a.id] END AS road
RETURN count(DISTINCT road)
""").evaluate()
# ---------------------------------
# Draw bar chart
# ---------------------------------
fig = go.Figure(
data=[
go.Bar(
x=["Intersections", "Roads"],
y=[intersections, roads],
text=[intersections, roads],
textposition="auto",
marker_color=["steelblue", "darkorange"]
)
]
)
fig.update_layout(
title="US Road Network Metrics",
xaxis_title="Metric",
yaxis_title="Count"
)
fig.show()
6. Degree Distribution Bar Chart
In [38]:
# Query degree distribution
with driver.session(database="road-network-db") as session:
results = session.run("""
MATCH (i:Intersection)-[r]-()
WITH i, count(r) AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree
""")
df = pd.DataFrame([dict(record) for record in results])
print(df) # Shows degree vs frequency table
# Plot degree distribution
fig = px.bar(df, x="degree", y="frequency",
labels={"degree": "Node Degree (# of roads)", "frequency": "Number of Intersections"},
title="Degree Distribution of Intersections")
fig.show()
degree frequency 0 2 258 1 4 236 2 6 238 3 8 312 4 10 289 .. ... ... 70 144 2 71 146 2 72 148 1 73 150 1 74 158 1 [75 rows x 2 columns]
7. Top 10 Most Connected Intersections Visualization
In [37]:
# Query top 10 intersections and build DataFrame directly
with driver.session(database="road-network-db") as session:
results = session.run("""
MATCH (i:Intersection)-[r]-()
WITH i, count(r) AS degree
ORDER BY degree DESC
LIMIT 10
RETURN i.id AS intersection_id, degree
""")
df = pd.DataFrame([dict(record) for record in results])
print(df) # Shows the top 10 intersections with their degree
# Plot bar chart
fig = px.bar(df, x="intersection_id", y="degree",
labels={"intersection_id": "Intersection ID", "degree": "Degree (# of roads)"},
title="Top 10 Most Connected Intersections")
fig.show()
intersection_id degree 0 2804 158 1 2803 150 2 3195 148 3 2807 146 4 2832 146 5 2799 144 6 3022 144 7 2806 140 8 1640 138 9 1627 138
8. Intersection Categories by Degree
In [36]:
# Query intersection categories and build DataFrame directly
with driver.session(database="road-network-db") as session:
results = session.run("""
MATCH (i:Intersection)-[r]-()
WITH i, count(r) AS degree
WITH CASE
WHEN degree <= 2 THEN 'Low'
WHEN degree <= 4 THEN 'Medium'
ELSE 'High'
END AS category
RETURN category, count(*) AS intersections
ORDER BY category
""")
df = pd.DataFrame([dict(record) for record in results])
print(df) # Shows category vs number of intersections
# Plot bar chart
fig = px.bar(df, x="category", y="intersections",
labels={"category": "Connectivity Category", "intersections": "Number of Intersections"},
title="Intersection Categories by Degree")
fig.show()
category intersections 0 High 9069 1 Low 258 2 Medium 236
9. Degree Distribution Bar Chart( Duplicated )
In [35]:
import pandas as pd
# Degree distribution query (Neo4j 5+ compatible)
query = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree
"""
# Run query using py2neo Graph (NO driver/session)
df = graph.run(query).to_data_frame()
# View results
df.head()
Out[35]:
| degree | frequency | |
|---|---|---|
| 0 | 1 | 2234 |
| 1 | 2 | 35486 |
| 2 | 3 | 29941 |
| 3 | 4 | 19652 |
| 4 | 5 | 227 |
Dashboard Summary
In [32]:
Q_TOTAL_INTERSECTIONS = """
MATCH (i:Intersection)
RETURN count(i) AS totalIntersections;
"""
Q_TOTAL_ROADS = """
MATCH (a:Intersection)-[:ROAD]-(b:Intersection)
WITH CASE WHEN a.id < b.id THEN [a.id, b.id] ELSE [b.id, a.id] END AS road
RETURN count(DISTINCT road) AS totalRoads;
"""
Q_DEGREE_DIST = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
RETURN degree, count(*) AS frequency
ORDER BY degree;
"""
Q_TOP10 = """
MATCH (i:Intersection)-[:ROAD]-()
WITH i, count(*) AS degree
RETURN i.id AS intersectionId, degree
ORDER BY degree DESC
LIMIT 10;
"""
Q_CATEGORIES = """
MATCH (i:Intersection)
WITH COUNT { (i)-[:ROAD]-() } AS degree
WITH CASE
WHEN degree <= 2 THEN 'Low'
WHEN degree <= 4 THEN 'Medium'
ELSE 'High'
END AS category
RETURN category, count(*) AS count
ORDER BY count DESC;
"""
# -------------------------------
# Fetch data
# -------------------------------
total_intersections = graph.run(Q_TOTAL_INTERSECTIONS).evaluate()
total_roads = graph.run(Q_TOTAL_ROADS).evaluate()
degree_df = graph.run(Q_DEGREE_DIST).to_data_frame()
top10_df = graph.run(Q_TOP10).to_data_frame()
cat_df = graph.run(Q_CATEGORIES).to_data_frame()
# -------------------------------
# ONE COMBINED DASHBOARD
# -------------------------------
fig = make_subplots(
rows=3, cols=2,
specs=[
[{"type": "indicator"}, {"type": "indicator"}],
[{"colspan": 2}, None],
[{"type": "bar"}, {"type": "bar"}]
],
subplot_titles=(
"Total Intersections",
"Total Roads (Unique)",
"Degree Distribution",
"Top 10 Most Connected Intersections",
"Intersection Categories by Degree"
)
)
# Indicators
fig.add_trace(go.Indicator(mode="number", value=total_intersections), row=1, col=1)
fig.add_trace(go.Indicator(mode="number", value=total_roads), row=1, col=2)
# Degree distribution
fig.add_trace(
go.Bar(x=degree_df["degree"], y=degree_df["frequency"]),
row=2, col=1
)
# Top 10
fig.add_trace(
go.Bar(x=top10_df["intersectionId"], y=top10_df["degree"]),
row=3, col=1
)
# Categories
fig.add_trace(
go.Bar(x=cat_df["category"], y=cat_df["count"]),
row=3, col=2
)
fig.update_layout(
height=900,
title="US Road Network – Summary Visual Analytics Dashboard",
showlegend=False
)
fig.show()
In [ ]: